{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "%matplotlib inline"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "\n# Demonstrating the different strategies of KBinsDiscretizer\n\n\nThis example presents the different strategies implemented in KBinsDiscretizer:\n\n- 'uniform': The discretization is uniform in each feature, which means that\n  the bin widths are constant in each dimension.\n- quantile': The discretization is done on the quantiled values, which means\n  that each bin has approximately the same number of samples.\n- 'kmeans': The discretization is based on the centroids of a KMeans clustering\n  procedure.\n\nThe plot shows the regions where the discretized encoding is constant.\n\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": false
      },
      "outputs": [],
      "source": [
        "# Author: Tom Dupr\u00e9 la Tour\n# License: BSD 3 clause\n\nimport numpy as np\nimport matplotlib.pyplot as plt\n\nfrom sklearn.preprocessing import KBinsDiscretizer\nfrom sklearn.datasets import make_blobs\n\nprint(__doc__)\n\nstrategies = ['uniform', 'quantile', 'kmeans']\n\nn_samples = 200\ncenters_0 = np.array([[0, 0], [0, 5], [2, 4], [8, 8]])\ncenters_1 = np.array([[0, 0], [3, 1]])\n\n# construct the datasets\nrandom_state = 42\nX_list = [\n    np.random.RandomState(random_state).uniform(-3, 3, size=(n_samples, 2)),\n    make_blobs(n_samples=[n_samples // 10, n_samples * 4 // 10,\n                          n_samples // 10, n_samples * 4 // 10],\n               cluster_std=0.5, centers=centers_0,\n               random_state=random_state)[0],\n    make_blobs(n_samples=[n_samples // 5, n_samples * 4 // 5],\n               cluster_std=0.5, centers=centers_1,\n               random_state=random_state)[0],\n]\n\nfigure = plt.figure(figsize=(14, 9))\ni = 1\nfor ds_cnt, X in enumerate(X_list):\n\n    ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n    ax.scatter(X[:, 0], X[:, 1], edgecolors='k')\n    if ds_cnt == 0:\n        ax.set_title(\"Input data\", size=14)\n\n    xx, yy = np.meshgrid(\n        np.linspace(X[:, 0].min(), X[:, 0].max(), 300),\n        np.linspace(X[:, 1].min(), X[:, 1].max(), 300))\n    grid = np.c_[xx.ravel(), yy.ravel()]\n\n    ax.set_xlim(xx.min(), xx.max())\n    ax.set_ylim(yy.min(), yy.max())\n    ax.set_xticks(())\n    ax.set_yticks(())\n\n    i += 1\n    # transform the dataset with KBinsDiscretizer\n    for strategy in strategies:\n        enc = KBinsDiscretizer(n_bins=4, encode='ordinal', strategy=strategy)\n        enc.fit(X)\n        grid_encoded = enc.transform(grid)\n\n        ax = plt.subplot(len(X_list), len(strategies) + 1, i)\n\n        # horizontal stripes\n        horizontal = grid_encoded[:, 0].reshape(xx.shape)\n        ax.contourf(xx, yy, horizontal, alpha=.5)\n        # vertical stripes\n        vertical = grid_encoded[:, 1].reshape(xx.shape)\n        ax.contourf(xx, yy, vertical, alpha=.5)\n\n        ax.scatter(X[:, 0], X[:, 1], edgecolors='k')\n        ax.set_xlim(xx.min(), xx.max())\n        ax.set_ylim(yy.min(), yy.max())\n        ax.set_xticks(())\n        ax.set_yticks(())\n        if ds_cnt == 0:\n            ax.set_title(\"strategy='%s'\" % (strategy, ), size=14)\n\n        i += 1\n\nplt.tight_layout()\nplt.show()"
      ]
    }
  ],
  "metadata": {
    "kernelspec": {
      "display_name": "Python 3",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.6.9"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}